#############################################################################
# Figure1.R
# Aim: to replicate Figure 1 in Atsusaka and Stevenson (2021)
# Run time: about 30 seconds
#############################################################################

library(tidyverse)
library(scales)
rm(list=ls())

# PLEASE RUN ALL OF THE FOLLOWING ALTOGETHER
pdf("Figure1.pdf", width=10, height=6.7)
par(mfrow=c(2,3), mar=c(2,2,3,2), oma=c(3.5,4,0,0), mgp=c(3,0.6,0))
set.seed(123)

#############################################################################
# (1) DATA GENERATING PROCESS
#############################################################################
# PARAMETERS
N = 2000        # Sample size
p = 0.15        # Randomization probability
p2 = 0.15       # Anchor randomization probability
inat.vec <- seq(from=0, to=1, by=0.1)       # % INAttentiveness vector
pi.vec <- c(0.05, 0.1, 0.2, 0.3, 0.4, 0.45) # True prevalence


# LOOP OVER THE LEVEL OF pi (OUTER LOOP (pi))
for(k in seq_along(pi.vec)){
pi = pi.vec[k]     # True prevalence FOR INATTENTIVE RESPONSE

# STORAGE OF ESTIMATES
pi.hat.naive <- rep(NA, 11)
naive.low <- rep(NA, 11)
naive.high <- rep(NA, 11)
B <- rep(NA, 11)
pi.hat.bc <- rep(NA, 11)
bc.low <- rep(NA, 11)
bc.high <- rep(NA, 11)
true.prev <- rep(NA, 11)

# LOOP OVER THE LEVEL OF inat.vec (INNER LOOP (inat.vec))
for(i in seq_along(inat.vec)){
gamma = 1 - inat.vec[i]                             # Attentive rate

Attentive = rbinom(n=N, size=1, prob=gamma)         # Attentive R or not

# SENSITIVE QUESTION OF INTERST
StatementA = rbinom(n=N, size=1, prob=pi)           # Sensitive item
StatementB = rbinom(n=N, size=1, prob=p)            # Randomization item
True.res = ifelse(StatementA != StatementB, 0, 1)   # Survey answer

Obs.res = rep(NA, N)
Obs.res[Attentive==1] = True.res[Attentive==1]      # Observed answer for attentive Rs
Obs.res[Attentive==0] = rbinom(n=length(N[Attentive==0]),
                               size=1, prob=0.5)    # Observed answer, otherwise


# NON-SENSITIVE AUXIALLY QUESTION
StatementC = 0                                      # Attention "c"heck item (True prevalence is 0 for everyone)
StatementD = rbinom(n=N, size=1, prob=p2)           # Anchor question
True.res.prime = ifelse(StatementC != StatementD, 0, 1)    # Survey answer in Anchor Question

Obs.res.prime = rep(NA, N)
Obs.res.prime[Attentive==1] = True.res.prime[Attentive==1] # Observed answer for attentive Rs
Obs.res.prime[Attentive==0] = rbinom(n=length(N[Attentive==0]),
                                     size=1, prob=0.5)     # Observed answer, otherwise


dat <- cbind(Obs.res, True.res, Attentive, Obs.res.prime, True.res.prime)
dat <- as_tibble(dat)
head(dat) # See the D

#############################################################################
# (2) NAIVE CROSSWISE ESTIMATOR
#############################################################################

lambda.hat = mean(Obs.res) # Observed proportion of TRUE-TRUE or FALSE-FALSE
pi.hat.naive[i] = (lambda.hat+p-1)/(2*p-1)

pi.hat.naive.var = (pi.hat.naive[i]*(1-pi.hat.naive[i]))/(N-1) +
                   (p*(1-p))/((N-1)*((2*p-1)^2))
pi.hat.naive.sd = sqrt(pi.hat.naive.var)

naive.low[i]  = pi.hat.naive[i]-1.96*pi.hat.naive.sd; naive.low  = ifelse(naive.low > 0,  naive.low, 0)
naive.high[i] = pi.hat.naive[i]+1.96*pi.hat.naive.sd; naive.high = ifelse(naive.high < 1, naive.high, 1)

B[i] = (gamma-1)/(2*gamma)*((lambda.hat-0.5)/(p-0.5))


#############################################################################
# (3) BIAS-CORRECTED ESTIMATOR
#############################################################################

gamma.hat = (mean(Obs.res.prime)-0.5)/(0.5-p2) # Estimated level of inattentiveness
Bias.hat = (1/2)*((lambda.hat-0.5)/(p-0.5)) - (1/(2*gamma.hat))*((lambda.hat-0.5)/(p-0.5))
Bias.hat

pi.hat.bc[i] = pi.hat.naive[i] - Bias.hat    # Bias Correction
pi.hat.bc[i] = min(1, max(pi.hat.bc[i], 0))  # Logical bound restrain


# BOOTSTRAPPING (1000 TIMES) FOR INFERENCE
bs <- NA
for(j in 1:1000){
  index <- sample(1:nrow(dat), size=dim(dat), replace=TRUE)   
  bs.dat <- dat[index,]                                   

  bs.lambda.hat = mean(bs.dat$Obs.res)                    # Observed proportion of YESYES or NONO
  bs.pi.hat.naive = (bs.lambda.hat+p-1)/(2*p-1)
  bs.gamma.hat = (mean(bs.dat$Obs.res.prime)-0.5)/(0.5-p) # Estimated level of inattentiveness
  bs.bias.hat = (1/2)*((bs.lambda.hat-0.5)/(p-0.5)) - (1/(2*bs.gamma.hat))*((bs.lambda.hat-0.5)/(p-0.5))

  bs[j] = bs.pi.hat.naive - bs.bias.hat # Bias Correction within Bootstrapping
  bs[j] = min(1, max(bs[j], 0))         # Logical bound restrain

  }

pi.hat.bc.var = var(bs)
pi.hat.bc.sd = sqrt(pi.hat.bc.var)

bc.low[i]  = quantile(bs, prob=0.025, na.rm=T); bc.low  = ifelse(bc.low > 0,  bc.low, 0)
bc.high[i] = quantile(bs, prob=0.975, na.rm=T); bc.high = ifelse(bc.high < 1, bc.high, 1)

}

#############################################################################
# (4) VISUALIZATION
#############################################################################

inat.vec2 <- inat.vec*100


plot(pi.hat.bc ~ inat.vec2, ylim=c(0,0.6), type="n", las=1,
     ylab="",
     xlab="",
     cex.axis=1.5)

# NAIVE ESTIMATOR
naive_y <- c(naive.low[1], naive.high, rev(naive.low))
naive_x <- c(inat.vec2[1], inat.vec2, rev(inat.vec2))
polygon(naive_x, naive_y, col=alpha(gray(0.9),0.8), border=NA)
abline(h=pi, col="gray50", lwd=2, lty=2)

lines(pi.hat.naive ~ inat.vec2, lwd=3, col=alpha("black",1))
lines(pi.hat.bc ~ inat.vec2, lwd=1, col=alpha("firebrick4",0.4))
lines(bc.high ~ inat.vec2, lty=1, lwd=0.5,col=alpha("dimgray",0.4))
lines(bc.low ~ inat.vec2, lty=1, lwd=0.5,col=alpha("dimgray",0.4))

title(substitute(paste(pi, sep=" = ", v), list(v=pi)), cex.main=2, font=2, line=1.2)


# TEXT ANNOTATION
if(pi==0.05){

text(x=80, y=0.22, labels="Bias", font=1, cex=1.8, col="navy")
arrows(x0=80, y0=0.26, x1=80, y1=0.36, length=0.1, angle=30, col="navy", lwd=2)
arrows(x0=80, y0=0.17, x1=80, y1=0.06, length=0, angle=30, col="navy", lwd=2)
text(x=48, y=0.52, labels="Conventional estimator", font=1, cex=1.8)
txcol="dimgray"
text(x=15, y=0.4, labels= bquote("n" == 2000), cex=1.5, col=txcol)
text(x=10, y=0.35, labels= bquote(paste(pi,"'") == 0), cex=1.5, col=txcol)
text(x=14, y=0.3, labels= bquote("p" == 0.15), cex=1.5, col=txcol)
text(x=14.5, y=0.25, labels= bquote("p'" == 0.15), cex=1.5, col=txcol)  

}else if(pi==0.3){
text(x=50, y=0.15, labels="Bias-corrected estimator", 
     font=1, cex=1.8, col=alpha("firebrick4",0.9))


}else{} # DO NOTHING


}

# X and Y-AXIS LABELS
mtext(side=1, line=2, outer=T, font=1, cex=1.5,
      text="Percentage of Inattentive Respondents")
mtext(side=2, line=2, outer=T, font=1, cex=1.5, las=0, 
      text="Prevalence of Sensitive Attributes")

dev.off() # CLOSE THE PDF FILE


#############################################################################
# END OF THIS R SOURCE FILE
#############################################################################